x— title: “EDA” output: html_document: default word_document: default date: “2023-11-21” —
library(tidyverse)
library(lubridate)
library(ggplot2)
library(cowplot)
library(dplyr)
library(DT)
library(GGally)
maindata <- read.csv("maindata.csv")
data_monthly <- read.csv("data_monthly.csv")
# Create a monthly date sequence from January 1990 to November 2023.
dates <- seq(ymd("1990-01-01"), ymd("2023-11-01"), by = "1 month")
# Assume maindata has been loaded into the environment. Now, add the date column.
maindata <- maindata %>%
mutate(date = dates)
# Melt the data into a long format suitable for ggplot2, excluding 'uvindex' and 'solarradiation' due to NaN values.
maindata_long <- maindata %>%
gather(key = "variable", value = "value", -date) %>%
filter(!is.nan(value))
# Assuming maindata_long is already created with the 'date' and 'value' columns
# and the 'variable' column indicating the type of measurement, such as temperature.
# First, we will filter out temperature-related variables only
maindata_temp <- maindata_long %>%
filter(variable %in% c("temp")) # Replace with actual temperature variable names
# Extract the year from the date
maindata_temp <- maindata_temp %>%
mutate(year = year(date))
# Assuming maindata_long is already created with the 'date' and 'value' columns
# and the 'variable' column indicating the type of measurement, such as temperature.
# First, we will filter out temperature-related variables only
maindata_temp <- maindata_long %>%
filter(variable %in% c("temp")) # Replace with actual temperature variable names
# Extract the year from the date
maindata_temp <- maindata_temp %>%
mutate(year = year(date))
# Calculate the annual mean temperature
annual_mean_temp <- maindata_temp %>%
group_by(year, variable) %>%
summarise(year_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Plot the annual mean temperature
annual_mean_temp_plot <- annual_mean_temp %>%
filter(variable == "temp") %>%
ggplot(aes(x = year, y = year_mean)) +
geom_point() +
geom_line() +
geom_smooth(method = "loess") +
theme(
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
labs(
title = "Annual Mean Temperature",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Degrees Celsius"
) +
NULL
annual_mean_temp_plot
# Assuming maindata_long has already been created and includes the 'date' and 'value' columns
# We'll use the 'temp' variable for plotting the mean temperature
# First, extract the year and month from the date
maindata_temp <- maindata_long %>%
filter(variable == "temp") %>%
mutate(year = year(date), month = month(date, label = TRUE))
# Calculate the monthly mean temperature
monthly_mean_temp <- maindata_temp %>%
group_by(year, month) %>%
summarise(month_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Define a colour scale for the months
my_colour <- scales::hue_pal()(12)
# Plot the monthly mean temperature
temp_plot <- ggplot(monthly_mean_temp, aes(x = year, y = month_mean, colour = month)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_color_manual(values = my_colour) +
labs(
title = "Monthly Mean Temperature",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Degrees Celsius"
) +
facet_wrap(~month) +
NULL
temp_plot
maindata_humidity<- maindata_long %>%
filter(variable %in% c("humidity"))
# Extract the year from the date
maindata_humidity <- maindata_humidity %>%
mutate(year = year(date))
# First, we will filter out temperature-related variables only
maindata_humidity <- maindata_long %>%
filter(variable %in% c("humidity"))
# Extract the year from the date
maindata_humidity <- maindata_humidity %>%
mutate(year = year(date))
# Calculate the annual mean humidity
annual_mean_humidity <- maindata_humidity %>%
group_by(year, variable) %>%
summarise(year_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Plot the annual mean humidity with the title and subtitle centered
annual_mean_humidity_plot <- annual_mean_humidity %>%
filter(variable == "humidity") %>%
ggplot(aes(x = year, y = year_mean)) +
geom_point() +
geom_line() +
geom_smooth(method = "loess") +
theme(
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
labs(
title = "Annual Mean Humidity",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "%"
) +
NULL
annual_mean_humidity_plot
# First, extract the year and month from the date
maindata_humidity <- maindata_long %>%
filter(variable == "humidity") %>%
mutate(year = year(date), month = month(date, label = TRUE))
# Calculate the monthly mean temperature
monthly_mean_humidity <- maindata_humidity %>%
group_by(year, month) %>%
summarise(month_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Define a colour scale for the months
my_colour <- scales::hue_pal()(12)
# Plot the monthly mean temperature
humidity_plot <- ggplot(monthly_mean_humidity, aes(x = year, y = month_mean, colour = month)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_color_manual(values = my_colour) +
labs(
title = "Monthly Mean humidity",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Degrees Celsius"
) +
facet_wrap(~month) +
NULL
humidity_plot
maindata_windspeed<- maindata_long %>%
filter(variable %in% c("windspeed"))
# Extract the year from the date
maindata_windspeed <- maindata_windspeed %>%
mutate(year = year(date))
# First, we will filter out temperature-related variables only
maindata_windspeed <- maindata_long %>%
filter(variable %in% c("windspeed"))
# Extract the year from the date
maindata_windspeed <- maindata_windspeed %>%
mutate(year = year(date))
# Calculate the annual mean humidity
annual_mean_windspeed <- maindata_windspeed %>%
group_by(year, variable) %>%
summarise(year_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Plot the annual mean windspeed with the title and subtitle centered
annual_mean_windspeed_plot <- annual_mean_windspeed %>%
filter(variable == "windspeed") %>%
ggplot(aes(x = year, y = year_mean)) +
geom_point() +
geom_line() +
geom_smooth(method = "loess") +
theme(
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
labs(
title = "Annual Mean Wind Speed",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Kilometers per hour (kph) "
) +
NULL
annual_mean_windspeed_plot
# First, extract the year and month from the date
maindata_windspeed <- maindata_long %>%
filter(variable == "windspeed") %>%
mutate(year = year(date), month = month(date, label = TRUE))
# Calculate the monthly mean temperature
monthly_mean_windspeed <- maindata_windspeed %>%
group_by(year, month) %>%
summarise(month_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Define a colour scale for the months
my_colour <- scales::hue_pal()(12)
# Plot the monthly mean temperature
windspeed_plot <- ggplot(monthly_mean_windspeed, aes(x = year, y = month_mean, colour = month)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_color_manual(values = my_colour) +
labs(
title = "Monthly Mean Wind Speed",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Kilometers per hour (kph) "
) +
facet_wrap(~month) +
NULL
windspeed_plot
maindata_cloudcover<- maindata_long %>%
filter(variable %in% c("cloudcover"))
# Extract the year from the date
maindata_cloudcover <- maindata_cloudcover %>%
mutate(year = year(date))
# First, we will filter out temperature-related variables only
maindata_cloudcover <- maindata_long %>%
filter(variable %in% c("cloudcover"))
# Extract the year from the date
maindata_cloudcover <- maindata_cloudcover %>%
mutate(year = year(date))
# Calculate the annual mean cloudcover
annual_mean_cloudcover <- maindata_cloudcover %>%
group_by(year, variable) %>%
summarise(year_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Plot the annual mean cloudcover with the title and subtitle centered
annual_mean_cloudcover_plot <- annual_mean_cloudcover %>%
filter(variable == "cloudcover") %>%
ggplot(aes(x = year, y = year_mean)) +
geom_point() +
geom_line() +
geom_smooth(method = "loess") +
theme(
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
labs(
title = "Annual Mean Cloud Cover",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "% "
) +
NULL
annual_mean_cloudcover_plot
# First, extract the year and month from the date
maindata_cloudcover <- maindata_long %>%
filter(variable == "cloudcover") %>%
mutate(year = year(date), month = month(date, label = TRUE))
# Calculate the monthly mean temperature
monthly_mean_cloudcover <- maindata_cloudcover %>%
group_by(year, month) %>%
summarise(month_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Define a colour scale for the months
my_colour <- scales::hue_pal()(12)
# Plot the monthly mean temperature
cloudcover_plot <- ggplot(monthly_mean_cloudcover, aes(x = year, y = month_mean, colour = month)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_color_manual(values = my_colour) +
labs(
title = "Monthly Mean Cloud Cover",
subtitle = "Data from 1990 - 2023",
y = "%"
) +
facet_wrap(~month) +
NULL
cloudcover_plot
maindata_sealevelpressure<- maindata_long %>%
filter(variable %in% c("sealevelpressure"))
# Extract the year from the date
maindata_sealevelpressure <- maindata_sealevelpressure %>%
mutate(year = year(date))
# First, we will filter out temperature-related variables only
maindata_sealevelpressure <- maindata_long %>%
filter(variable %in% c("sealevelpressure"))
# Extract the year from the date
maindata_sealevelpressure <- maindata_sealevelpressure %>%
mutate(year = year(date))
# Calculate the annual mean humidity
annual_mean_sealevelpressure <- maindata_sealevelpressure %>%
group_by(year, variable) %>%
summarise(year_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Plot the annual mean sealevelpressure with the title and subtitle centered
annual_mean_sealevelpressure_plot <- annual_mean_sealevelpressure %>%
filter(variable == "sealevelpressure") %>%
ggplot(aes(x = year, y = year_mean)) +
geom_point() +
geom_line() +
geom_smooth(method = "loess") +
theme(
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
labs(
title = "Annual Mean Sea Level Pressure",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Millibars (mb)"
) +
NULL
annual_mean_sealevelpressure_plot
# First, extract the year and month from the date
maindata_sealevelpressure <- maindata_long %>%
filter(variable == "sealevelpressure") %>%
mutate(year = year(date), month = month(date, label = TRUE))
# Calculate the monthly mean temperature
monthly_mean_sealevelpressure <- maindata_sealevelpressure %>%
group_by(year, month) %>%
summarise(month_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Define a colour scale for the months
my_colour <- scales::hue_pal()(12)
# Plot the monthly mean temperature
sealevelpressure_plot <- ggplot(monthly_mean_sealevelpressure, aes(x = year, y = month_mean, colour = month)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_color_manual(values = my_colour) +
labs(
title = "Monthly Mean Sea Level Pressure",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Millibars (mb)"
) +
facet_wrap(~month) +
NULL
sealevelpressure_plot
maindata_precip<- maindata_long %>%
filter(variable %in% c("precip"))
# Extract the year from the date
maindata_precip <- maindata_precip %>%
mutate(year = year(date))
# First, we will filter out temperature-related variables only
maindata_precip <- maindata_long %>%
filter(variable %in% c("precip"))
# Extract the year from the date
maindata_precip <- maindata_precip %>%
mutate(year = year(date))
# Calculate the annual mean humidity
annual_mean_precip <- maindata_precip %>%
group_by(year, variable) %>%
summarise(year_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Plot the annual mean precip with the title and subtitle centered
annual_mean_precip_plot <- annual_mean_precip %>%
filter(variable == "precip") %>%
ggplot(aes(x = year, y = year_mean)) +
geom_point() +
geom_line() +
geom_smooth(method = "loess") +
theme(
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
labs(
title = "Annual Mean Precipitation",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "mm"
) +
NULL
annual_mean_precip_plot
# First, extract the year and month from the date
maindata_precip <- maindata_long %>%
filter(variable == "precip") %>%
mutate(year = year(date), month = month(date, label = TRUE))
# Calculate the monthly mean temperature
monthly_mean_precip <- maindata_precip %>%
group_by(year, month) %>%
summarise(month_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Define a colour scale for the months
my_colour <- scales::hue_pal()(12)
# Plot the monthly mean temperature
precip_plot <- ggplot(monthly_mean_precip, aes(x = year, y = month_mean, colour = month)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_color_manual(values = my_colour) +
labs(
title = "Monthly Precipitation",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "mm"
) +
facet_wrap(~month) +
NULL
precip_plot
data_monthly |>
ggplot(aes(x=month, y=temp, col=year)) +
geom_jitter() +
geom_smooth(method="smooth") +
theme_cowplot() +
xlab("Month") + ylab("Temperature")
temp_data <- maindata_long %>%
filter(variable == "temp") %>%
mutate(month = factor(month(date, label = TRUE))) # Converting the date to a month factor
# Creating the boxplot for each month
temp_boxplot <- ggplot(temp_data, aes(x = month, y = value)) +
geom_boxplot() +
labs(title = "Monthly Boxplots of Temperature",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "Degrees Celsius") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(temp_boxplot)
## Boxplot: temp with outlier
temp_data <- maindata_long %>%
filter(variable == "temp") %>%
mutate(year = as.character(year(date)),
month = factor(month(date, label = TRUE)))
outliers <- temp_data %>%
group_by(month) %>%
summarise(lower = quantile(value, 0.25) - 1.5 * IQR(value),
upper = quantile(value, 0.75) + 1.5 * IQR(value)) %>%
left_join(temp_data, by = "month") %>%
filter(value < lower | value > upper)
temp_boxplot_outlier <- ggplot(temp_data, aes(x = month, y = value)) +
geom_boxplot() +
geom_text(data = outliers, aes(label = year), vjust = -0.5) +
labs(title = "Monthly Boxplots of Temperature",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "Degrees Celsius") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(temp_boxplot_outlier)
humidity_data <- maindata_long %>%
filter(variable == "humidity") %>%
mutate(month = factor(month(date, label = TRUE))) # Converting the date to a month factor
# Creating the boxplot for each month
humidity_boxplot <- ggplot(temp_data, aes(x = month, y = value)) +
geom_boxplot() +
labs(title = "Monthly Boxplots of Humidity",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "%") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(humidity_boxplot)
## Boxplot: humidity with outlier
humidity_data <- maindata_long %>%
filter(variable == "humidity") %>%
mutate(year = as.character(year(date)),
month = factor(month(date, label = TRUE)))
outliers <- humidity_data %>%
group_by(month) %>%
summarise(lower = quantile(value, 0.25) - 1.5 * IQR(value),
upper = quantile(value, 0.75) + 1.5 * IQR(value)) %>%
left_join(humidity_data, by = "month") %>%
filter(value < lower | value > upper)
humidity_boxplot_outlier <- ggplot(humidity_data, aes(x = month, y = value)) +
geom_boxplot() +
geom_text(data = outliers, aes(label = year), vjust = -0.5) +
labs(title = "Monthly Boxplots of Humidity",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "%") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(humidity_boxplot_outlier)
## Boxplot: windspeed
windspeed_data <- maindata_long %>%
filter(variable == "windspeed") %>%
mutate(month = factor(month(date, label = TRUE))) # Converting the date to a month factor
# Creating the boxplot for each month
windspeed_boxplot <- ggplot(temp_data, aes(x = month, y = value)) +
geom_boxplot() +
labs(title = "Monthly Boxplots of Wind Speed",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "Kilometers per hour (kph)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(windspeed_boxplot)
windspeed_data <- maindata_long %>%
filter(variable == "windspeed") %>%
mutate(year = as.character(year(date)),
month = factor(month(date, label = TRUE)))
outliers <- windspeed_data %>%
group_by(month) %>%
summarise(lower = quantile(value, 0.25) - 1.5 * IQR(value),
upper = quantile(value, 0.75) + 1.5 * IQR(value)) %>%
left_join(windspeed_data, by = "month") %>%
filter(value < lower | value > upper)
windspeed_boxplot_outlier <- ggplot(windspeed_data, aes(x = month, y = value)) +
geom_boxplot() +
geom_text(data = outliers, aes(label = year), vjust = -0.5) +
labs(title = "Monthly Boxplots of Wind Speed",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "Kilometers per hour (kph)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(windspeed_boxplot_outlier)
cloudcover_data <- maindata_long %>%
filter(variable == "cloudcover") %>%
mutate(month = factor(month(date, label = TRUE))) # Converting the date to a month factor
# Creating the boxplot for each month
cloudcover_boxplot <- ggplot(temp_data, aes(x = month, y = value)) +
geom_boxplot() +
labs(title = "Monthly Boxplots of Cloud Cover",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "%") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(cloudcover_boxplot)
cloudcover_data <- maindata_long %>%
filter(variable == "cloudcover") %>%
mutate(year = as.character(year(date)),
month = factor(month(date, label = TRUE)))
outliers <- cloudcover_data %>%
group_by(month) %>%
summarise(lower = quantile(value, 0.25) - 1.5 * IQR(value),
upper = quantile(value, 0.75) + 1.5 * IQR(value)) %>%
left_join(cloudcover_data, by = "month") %>%
filter(value < lower | value > upper)
cloudcover_boxplot_outlier <- ggplot(cloudcover_data, aes(x = month, y = value)) +
geom_boxplot() +
geom_text(data = outliers, aes(label = year), vjust = -0.5) +
labs(title = "Monthly Boxplots of Cloud Cover",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "%") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(cloudcover_boxplot_outlier)
sealevelpressure_data <- maindata_long %>%
filter(variable == "sealevelpressure") %>%
mutate(month = factor(month(date, label = TRUE))) # Converting the date to a month factor
# Creating the boxplot for each month
sealevelpressure_boxplot <- ggplot(temp_data, aes(x = month, y = value)) +
geom_boxplot() +
labs(title = "Monthly Boxplots of Sea Level Pressure",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "Millibars (mb)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(sealevelpressure_boxplot)
sealevelpressure_data <- maindata_long %>%
filter(variable == "sealevelpressure") %>%
mutate(year = as.character(year(date)),
month = factor(month(date, label = TRUE)))
outliers <- sealevelpressure_data %>%
group_by(month) %>%
summarise(lower = quantile(value, 0.25) - 1.5 * IQR(value),
upper = quantile(value, 0.75) + 1.5 * IQR(value)) %>%
left_join(sealevelpressure_data, by = "month") %>%
filter(value < lower | value > upper)
sealevelpressure_boxplot_outlier <- ggplot(sealevelpressure_data, aes(x = month, y = value)) +
geom_boxplot() +
geom_text(data = outliers, aes(label = year), vjust = -0.5) +
labs(title = "Monthly Boxplots of Sea Level Pressure",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "Millibars (mb)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(sealevelpressure_boxplot_outlier)
precip_data <- maindata_long %>%
filter(variable == "precip") %>%
mutate(month = factor(month(date, label = TRUE))) # Converting the date to a month factor
# Creating the boxplot for each month
precip_boxplot <- ggplot(temp_data, aes(x = month, y = value)) +
geom_boxplot() +
labs(title = "Monthly Boxplots of Precipitation",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "mm") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(precip_boxplot)
precip_data <- maindata_long %>%
filter(variable == "precip") %>%
mutate(year = as.character(year(date)),
month = factor(month(date, label = TRUE)))
outliers <- precip_data %>%
group_by(month) %>%
summarise(
lower = quantile(value, 0.25, na.rm = TRUE) - 1.5 * IQR(value, na.rm = TRUE),
upper = quantile(value, 0.75, na.rm = TRUE) + 1.5 * IQR(value, na.rm = TRUE)
) %>%
left_join(precip_data, by = "month") %>%
filter(value < lower | value > upper)
precip_boxplot_outlier <- ggplot(precip_data, aes(x = month, y = value)) +
geom_boxplot() +
geom_text(data = outliers, aes(label = year), vjust = -0.5) +
labs(title = "Monthly Boxplots of Precipitation",
subtitle = "Data from Jan. 1990 to Nov. 2023",
x = "Month",
y = "mm") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
print(precip_boxplot_outlier)
# First, we will filter out condition-related variables only
maindata_condition <- maindata_long %>%
filter(variable %in%
c("Clear","Overcast","Partially.cloudy",
"Rain..Partially.cloudy", "Rain..Overcast", "Rain"))
# Extract the year from the date
maindata_condition <- maindata_condition %>%
mutate(year = year(date))
# Calculate the annual mean frequency
annual_mean_condition <- maindata_condition %>%
group_by(year, variable) %>%
summarise(year_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Plot the annual mean frequency with the title and subtitle centered
annual_mean_condition_plot <- annual_mean_condition %>%
ggplot(aes(x = year, y = year_mean)) +
geom_point() +
geom_line() +
geom_smooth(method = "loess") +
facet_wrap(~variable) +
theme(
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
labs(
title = "Annual Mean Frequency of Weather Condition",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Count"
) +
NULL
annual_mean_condition_plot
# First, extract the year and month from the date
maindata_condition <- maindata_long %>%
filter(variable %in%
c("Clear","Overcast","Partially.cloudy",
"Rain..Partially.cloudy", "Rain..Overcast", "Rain")) %>%
mutate(year = year(date), month = month(date, label = TRUE))
# Calculate the monthly mean frequency
monthly_mean_condition <- maindata_condition %>%
group_by(variable, year, month) %>%
summarise(month_mean = mean(value, na.rm = TRUE)) %>%
ungroup()
# Define a colour scale for the months
my_colour <- scales::hue_pal()(12)
# Plot
condition_plot <- ggplot(monthly_mean_condition, aes(x = year, y = month_mean, colour = month)) +
geom_point(size = 0.5) +
geom_smooth(method = "loess") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.title.x = element_blank(),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_color_manual(values = my_colour) +
labs(
title = "Monthly Frequency of Weather Condition",
subtitle = "Data from Jan. 1990 to Nov. 2023",
y = "Count"
) +
facet_grid(variable~month) +
NULL
condition_plot
library(DT)
maindata %>%
mutate(year = year(date), month = month(date, label = TRUE)) -> data_summary
data_summary %>%
select(year, temp, humidity, windspeed, cloudcover, sealevelpressure, precip) %>%
group_by(year) %>%
summarise(across(everything(),
list(min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
mean = ~mean(., na.rm = TRUE)))) %>%
mutate_all(~round(., 3)) -> datatable1
datatable(datatable1, options = list(autoWidth = FALSE, scrollX = TRUE))
data_summary %>%
select(Clear:month) %>%
group_by(year) %>%
summarise_at(vars(Clear:Rain),
list(min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
mean = ~mean(., na.rm = TRUE))) %>%
mutate_all(~ifelse(is.finite(.), ., NA)) %>%
mutate_all(~round(., 3)) -> datatable2
datatable(datatable2, options = list(autoWidth = FALSE, scrollX = TRUE))
# Joining the two tables on the 'year' column
final_table <- left_join(datatable1, datatable2, by = "year")
# Displaying the combined table
datatable(final_table, options = list(autoWidth = FALSE, scrollX = TRUE))
#Excluding the variables ‘Clear’, ‘Overcast’, ‘Partially cloudy’, ‘Rain, Partially cloudy’, ‘Rain, Overcast’, ‘Rain’, as they make the data look too messy. Should I add them back? # If you want to summarize the data by month instead of by year, you can add ‘group_by(year, month)’
# heatmap(Excluding the 'Clear', 'Overcast', 'Partially cloudy','Rain, Partially cloudy', 'Rain, Overcast', 'Rain')
data_summary %>%
select(temp, humidity, windspeed, cloudcover, sealevelpressure, precip) -> cordata
library(GGally)
ggpairs(cordata)
cordata %>%
cor(use = "complete.obs") -> cor_matrix
heatmap(cor_matrix, Colv=NA, Rowv = NA, scale = "column")
library(corrplot)
corrplot(cor_matrix, method = 'color', type = 'lower', order = 'hclust',
tl.col = 'black', addCoef.col = 'black', number.cex = 0.6,
cl.ratio = 0.2, tl.srt = 45, col = COL2('RdBu', 10))
## leave blank on non-significant coefficient
## add significant correlation coefficients
testRes = cor.mtest(cordata, conf.level = 0.95)
corrplot(cor_matrix, p.mat = testRes$p,
method = 'color', type = 'lower', insig='blank',
tl.col = 'black', addCoef.col = 'black', number.cex = 0.6,
order = 'AOE', diag=FALSE)